Bray-Curtis dissimilarity matrix analysis (SIMKA)

Analysis done to the Bray-Curtis dissimilarity matrix resulting using the program SIMKA.

Loading Vegan package:

library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6

Loading the abundance Bray-Curtis matrix. Note: The sample names have been changed from the original in order to interpret them better.

I loaded the matrix twice to set the matrix correctly with the appropiate row names corresponding to the samples (otherwise I get an error).

NMDS

simka_nmds <- monoMDS(simka_tab)
simka_nmds
## 
## Call:
## monoMDS(dist = simka_tab) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'unknown'
## 
## Dimensions: 2 
## Stress:     0.0642279 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 56 iterations: Stress nearly unchanged (ratio > sratmax)
plot(simka_nmds)

stressplot(simka_nmds)

UPGMA

Dendogram generated using UPGMA clustering algorithm:

simka_dis.mtx <- as.dist(simka_tab)
simka_hclust <- hclust(simka_dis.mtx, method = "average")
plot(simka_hclust, main = "SIMKA dendogram")

Heatmap

simka_mtx <- as.matrix(simka_tab)
simka_dendo <- as.dendrogram(simka_hclust)
heatmap(simka_mtx, Rowv = simka_dendo, Colv = "Rowv", symm = TRUE, main = "SIMKA heatmap")


Function abundance tables

Computation for Bray-Curtis dissimilarity matrices for GO, GO-slim and Interpro (IPR) abundance tables.

GO dissimilarity matrix

GO_table <- read.table(file = "/home/srllana/R/Metagenomics_internship/Tables/ERP112966_GO_abundances_v4.1.tsv",
                       header = TRUE, sep = "\t", row.names = 1)
GO_table$description <- NULL
GO_table$category <- NULL
GO_ttable <- t(GO_table)
#GO_ttable <- as.data.frame(GO_ttable)
#head(GO_ttable, 10)
#GO_ttable <- as.matrix(GO_ttable)

#Generating the dissimilarity matrix
GO_bray_dist <- vegdist(GO_ttable, method = "bray")
#head(as.matrix(GO_bray_dist), 5)

GO UPGMA

GO_bray_hclust <- hclust(GO_bray_dist, method = "average")
plot(GO_bray_hclust, main = "GO dendogram")

GO Heatmap

GO_bray_mtx <- as.matrix(GO_bray_dist)
GO_bray_dendo <- as.dendrogram(GO_bray_hclust)
heatmap(GO_bray_mtx, Rowv = GO_bray_dendo, Colv = "Rowv", symm = TRUE, main = "GO heatmap")

GO-slim dissimilarity matrix

GOslim_table <- read.table(file = "/home/srllana/R/Metagenomics_internship/Tables/ERP112966_GO-slim_abundances_v4.1.tsv",
                       header = TRUE, sep = "\t", row.names = 1)
GOslim_table$description <- NULL
GOslim_table$category <- NULL
GOslim_ttable <- t(GOslim_table)
GOslim_ttable <- as.data.frame(GOslim_ttable)
#head(GOslim_ttable, 10)
GOslim_ttable <- as.matrix(GOslim_ttable)

#Generating the dissimilarity matrix
GOslim_bray_dist <- vegdist(GOslim_ttable, method = "bray")
#head(as.matrix(GOslim_bray_dist), 5)

GO-slim UPGMA

GOslim_bray_hclust <- hclust(GOslim_bray_dist, method = "average")
plot(GOslim_bray_hclust, main = "GO-slim dendogram")

GO-slim heatmap

GOslim_bray_mtx <- as.matrix(GOslim_bray_dist)
GOslim_bray_dendo <- as.dendrogram(GOslim_bray_hclust)
heatmap(GOslim_bray_mtx, Rowv = GOslim_bray_dendo, Colv = "Rowv", symm = TRUE, main = "GO-slim heatmap")

Interpro dissimilarity matrix

IPR_table <- read.table(file = "/home/srllana/R/Metagenomics_internship/Tables/ERP112966_IPR_abundances_v4.1.tsv",
                       header = TRUE, sep = "\t", row.names = 1)
IPR_table$description <- NULL
IPR_ttable <- t(IPR_table)
IPR_ttable <- as.data.frame(IPR_ttable)
#head(IPR_ttable, 10)
IPR_ttable <- as.matrix(IPR_ttable)

#Generating the dissimilarity matrix
IPR_bray_dist <- vegdist(IPR_ttable, method = "bray")
#head(as.matrix(IPR_bray_dist), 5)

Interpro UPGMA

IPR_bray_hclust <- hclust(IPR_bray_dist, method = "average")
plot(IPR_bray_hclust, main = "IPR dendogram")

Interpro heatmap

IPR_bray_mtx <- as.matrix(IPR_bray_dist)
IPR_bray_dendo <- as.dendrogram(IPR_bray_hclust)
heatmap(IPR_bray_mtx, Rowv = IPR_bray_dendo, Colv = "Rowv", symm = TRUE, main = "IPR heatmap")


Analysis of Bray-Curtis matrices for the GO abundace table with and without subsampling

Bray-Curtis matrices for the GO table table done without subsampling and subsampling:

The NS dissimilarity matrix is already generated at this point ('GO_bray_dist').

Generating SS_MIN and SS_MEAN distance dissimilarity matrices

In addition to the distance matrices, the dendograms (UPGMA) and heatmaps are also included.

print(paste("Minimum number of reads =", min(rowSums(GO_ttable))))
## [1] "Minimum number of reads = 22506"
GO_table_ss_min <- rrarefy(GO_ttable, 22506)

#Obtain a Bray Curtis dissimilarity matrix:
GO_ss_min_bray_dist <- vegdist(GO_table_ss_min, method = "bray")

#hclust UPGMA
GO_ss_min_hclust <- hclust(GO_ss_min_bray_dist, method = "average")
plot(GO_ss_min_hclust, main = "SS_MIN dendogram")

#Heatmap
GO_ss_min_bray_mtx <- as.matrix(GO_ss_min_bray_dist)
GO_ss_min_dendo <- as.dendrogram(GO_ss_min_hclust)
heatmap(GO_ss_min_bray_mtx, Rowv = GO_ss_min_dendo, Colv = "Rowv", symm = TRUE, main = "SS_MIN heatmap")

print(paste("Mean number of reads =", mean(rowSums(GO_ttable))))
## [1] "Mean number of reads = 361134.78"
GO_table_ss_mean <- rrarefy(GO_ttable, 361135)
## Warning in rrarefy(GO_ttable, 361135): some row sums < 'sample' and are not
## rarefied
#Obtain a Bray Curtis dissimilarity matrix:
GO_ss_mean_bray_dist <- vegdist(GO_table_ss_mean, method = "bray")

#hclust UPGMA
GO_ss_mean_hclust <- hclust(GO_ss_mean_bray_dist, method = "average")
plot(GO_ss_mean_hclust, main = "SS_mean dendogram")

#Heatmap
GO_ss_mean_bray_mtx <- as.matrix(GO_ss_mean_bray_dist)
GO_ss_mean_dendo <- as.dendrogram(GO_ss_mean_hclust)
heatmap(GO_ss_mean_bray_mtx, Rowv = GO_ss_mean_dendo, Colv = "Rowv", symm = TRUE, main = "SS_MEAN heatmap")

Mantel Tests for dissimilarity matrices

plot(GO_ss_min_bray_dist, GO_bray_dist, main = "SS_MIN vs NS")
abline(0,1)

mantel(GO_ss_min_bray_dist, GO_bray_dist)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GO_ss_min_bray_dist, ydis = GO_bray_dist) 
## 
## Mantel statistic r: 0.7766 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.119 0.160 0.191 0.238 
## Permutation: free
## Number of permutations: 999
plot(GO_ss_mean_bray_dist, GO_bray_dist, main = "SS_MEAN vs NS")
abline(0,1)

mantel(GO_ss_mean_bray_dist, GO_bray_dist)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GO_ss_mean_bray_dist, ydis = GO_bray_dist) 
## 
## Mantel statistic r: 0.9375 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.121 0.156 0.195 0.255 
## Permutation: free
## Number of permutations: 999
plot(GO_ss_min_bray_dist, GO_ss_mean_bray_dist, main = "SS_MIN vs SS_MEAN")
abline(0,1)

mantel(GO_ss_min_bray_dist, GO_ss_mean_bray_dist)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GO_ss_min_bray_dist, ydis = GO_ss_mean_bray_dist) 
## 
## Mantel statistic r: 0.8523 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.140 0.185 0.231 0.267 
## Permutation: free
## Number of permutations: 999
plot(GO_ss_min_bray_dist, simka_dis.mtx, main = "SS_MIN vs SIMKA")
abline(0,1)

mantel(GO_ss_min_bray_dist, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GO_ss_min_bray_dist, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: -0.04353 
##       Significance: 0.65 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.118 0.161 0.215 0.256 
## Permutation: free
## Number of permutations: 999
plot(GO_ss_mean_bray_dist, simka_dis.mtx, main = "SS_MEAN vs SIMKA")
abline(0,1)

mantel(GO_ss_mean_bray_dist, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GO_ss_mean_bray_dist, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: 0.01576 
##       Significance: 0.381 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.114 0.151 0.186 0.221 
## Permutation: free
## Number of permutations: 999
plot(GO_bray_dist, simka_dis.mtx, main = "NS vs SIMKA")
abline(0,1)

mantel(GO_bray_dist, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GO_bray_dist, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: -0.007522 
##       Significance: 0.48 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.103 0.140 0.176 0.206 
## Permutation: free
## Number of permutations: 999

Good correlation between the GO dissimilarity matrices, although there is a lot of dispersion in the plots Bad correation between SIMKA and GO dissimilarity matrices

NMDS

GO_ns_bray_nmds <- monoMDS(GO_bray_dist)
plot(GO_ns_bray_nmds, main = "NS NMDS")

GO_ss_min_bray_nmds <- monoMDS(GO_ss_min_bray_dist)
plot(GO_ss_min_bray_nmds, main = "SS_MIN NMDS")

GO_ss_mean_bray_nmds <- monoMDS(GO_ss_mean_bray_dist)
plot(GO_ss_mean_bray_nmds, main = "SS_MEAN NMDS")

plot(simka_nmds, main = "SIMKA NMDS")

We cannot form clear sample clusters of the GO dis. matrices

ANOSIM NS

Function to reorder the rows and columns of a symmetric matrix (from package 'graph4lg'):

reorder_mat <- function(mat, order){
  
  # Number of elements in the vector 'order'
  n <- length(order)
  
  # Check whether 'mat' is a 'matrix'
  if(!inherits(mat, "matrix")){
    stop("'mat' must be a matrix")
    # Check whether 'order' is of class 'character'
  } else if (!inherits(order, "character")){
    stop("'order' must be a character vector")
    # Check whether 'mat' is a symmetric matrix
  } else if(!(isSymmetric(mat))){
    stop("The matrix 'mat' must be symmetric")
    # Check whether 'order' has as many elements as there are rows
    # and columns in 'mat'
  } else if (n != length(colnames(mat))){
    stop("'order' must have as many elements as there are rows and
         columns in 'mat'")
    # Check whether the column names are in the 'order' vector
  } else if(length(which(colnames(mat) %in% order)) != n){
    stop("The column names of the matrix you want to reorder must
         be present in the vector 'order'")
    # Check whether the row names are in the 'order' vector
  } else if (length(which(row.names(mat) %in% order)) != n){
    print("The row names of the matrix you want to reorder must
          be present in the vector 'order'")
  } else {
    
    # Reorder 'mat' according to 'order'
    mat2 <- mat[order, order]
    
    return(mat2)
  }
}

Generating the grups (clusters) and reordering the symmetric matrix (dis. matrix):

GO_ns_samples <- c("D1_S023_100L.m_R01", "D1_S320_716L.m_R00", "D1_S02_1L.s_R01", "D1_S02_10L.s_R03",
                   "D1_S023_10L.m_R01", "D1_S023_100L.m_R02", "D1_S02_10L.s_R01", "D1_S02_2.5L.s_R01.4",
                   "D1_S02_1L.s_R02", "D1_S02_2.5L.s_R01.2", "D1_S023_100L.m_R03", "D1_S023_60L.m_R01",
                   "D1_S023_60L.m_R03", "D1_S023_716L.m_R00", "D1_S023_496L.m_R00", "D1_S02_10L.m_R03",
                   "D1_S023_100L.m_R02.1", "D2_S023_100L.m_R11", "D1_S20_100L.m_R03", "D1_S20_496L.m_R00",
                   "D1_S20_776L.m_R00", "D1_S20_120L.m_R01", "D1_S320_100L.m_R02", "D1_S02_10L.m_R01",
                   "D1_S320_100L.m_R02.1", "D2_S023_1000L.m_R00", "D1_S320_100L.m_R03", "D1_S02_1L.s_R03",
                   "D1_S023_10L.m_R03", "D1_S02_10L.s_R02", "D1_S320_60L.m_R01", "D1_S02_2.5L.s_R01.3",
                   "D1_S02_2.5L.s_R01.1", "D1_S023_10L.m_R02", "D1_S320_60L.m_R03", "D2_S320_1000L.m_R00",
                   "D1_S02_10L.m_R02", "D2_S320_100L.m_R11", "D1_S320_496L.m_R00", "D1_S320_100L.m_R01",
                   "D1_S20_100L.m_R02", "D1_S20_100L.m_R01", "D1_S320_10L.m_R01", "D1_S320_10L.m_R02",
                   "D1_S20_60L.m_R03", "D2_S20_100L.m_R11", "D2_S20_1000L.m_R00", "D1_S320_10L.m_R03",
                   "D1_S20_30L.m_R123", "D1_S20_100L.m_R02.1")

GO_ns_groups <- c(rep("A", 13), rep("B", 5), rep("C", 3), rep("D", 4), rep("E", 4), rep("F", 5), rep("G", 6), rep("H", 2), rep("I", 2), rep("J", 3), rep("K", 3))

GO_ns_GroupedSamples <- data.frame(GO_ns_samples, GO_ns_groups, row.names = 1)

GO_bray_mtx_reordered <- reorder_mat(GO_bray_mtx, GO_ns_samples)

ANOSIM Test:

GO_ns_anosim <- anosim(GO_bray_mtx_reordered, GO_ns_GroupedSamples$GO_ns_groups, permutations = 999, distance = "bray", NULL)
par(cex=1, mar=c(5, 5, 5, 5))
plot(GO_ns_anosim, main="NS ANOSIM")
## Warning in bxp(structure(list(stats = structure(c(30, 407, 681, 953, 1225, :
## some notches went outside hinges ('box'): maybe set notch=FALSE

summary(GO_ns_anosim) #ANOSIM significant
## 
## Call:
## anosim(x = GO_bray_mtx_reordered, grouping = GO_ns_GroupedSamples$GO_ns_groups,      permutations = 999, distance = "bray", strata = NULL) 
## Dissimilarity: user supplied square matrix 
## 
## ANOSIM statistic R: 0.9633 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0985 0.1306 0.1736 0.2083 
## 
## Dissimilarity ranks between and within classes:
##          0%    25%   50%    75% 100%    N
## Between  30 407.00 681.0 953.00 1225 1089
## A         2  31.25  64.0 110.75  213   78
## B         8  28.00  65.5 105.75  188   10
## C        60  82.50 105.0 131.50  158    3
## D        22  75.00 117.0 157.50  179    6
## E        33  51.00  79.0  97.25  138    6
## F         4  15.50  32.0  48.75   62   10
## G         1  67.50  77.0 112.50  181   15
## H        93  93.00  93.0  93.00   93    1
## I       245 245.00 245.0 245.00  245    1
## J       165 167.00 169.0 240.00  311    3
## K       281 365.00 449.0 513.50  578    3

ANOSIM significant

ANOSIM SIMKA

Generating the grups (clusters) and reordering the symmetric matrix (dis. matrix):

Simka_samples <- c("D1_S20_100L.m_R02.1", "D2_S20_1000L.m_R00", "D2_S20_100L.m_R11", "D1_S20_30L.m_R123",
                   "D1_S20_776L.m_R00", "D1_S20_60L.m_R03", "D1_S20_100L.m_R02", "D1_S20_496L.m_R00",
                   "D1_S20_100L.m_R01", "D1_S20_120L.m_R01", "D1_S20_100L.m_R03", "D2_S320_1000L.m_R00",
                   "D2_S320_100L.m_R11", "D1_S320_496L.m_R00", "D1_S02_10L.m_R01", "D1_S02_10L.m_R02",
                   "D1_S023_10L.m_R03", "D1_S02_2.5L.s_R01.1", "D1_S02_2.5L.s_R01.3", "D1_S02_10L.s_R02",
                   "D1_S023_10L.m_R02", "D1_S02_1L.s_R03", "D1_S02_10L.s_R03", "D1_S02_1L.s_R01",
                   "D1_S02_2.5L.s_R01.4", "D1_S02_10L.s_R01", "D1_S02_1L.s_R02", "D1_S02_2.5L.s_R01.2",
                   "D2_S023_1000L.m_R00", "D2_S023_100L.m_R11", "D1_S02_10L.m_R03", "D1_S023_100L.m_R01",
                   "D1_S023_716L.m_R00", "D1_S023_60L.m_R01", "D1_S023_10L.m_R01", "D1_S023_496L.m_R00",
                   "D1_S023_100L.m_R02.1", "D1_S023_100L.m_R03", "D1_S023_100L.m_R02", "D1_S023_60L.m_R03",
                   "D1_S320_716L.m_R00", "D1_S320_60L.m_R03", "D1_S320_60L.m_R01", "D1_S320_100L.m_R03",
                   "D1_S320_100L.m_R02", "D1_S320_100L.m_R01", "D1_S320_100L.m_R02.1", "D1_S320_10L.m_R03",
                   "D1_S320_10L.m_R01", "D1_S320_10L.m_R02")

simka_mtx_reordered <- reorder_mat(simka_mtx, Simka_samples)

Simka_groups <- c(rep("A", 11), rep("B", 2), rep("C", 1), rep("D", 1), rep("E", 25), rep("F", 4), rep("G", 3), rep("H", 3))

Simka_GroupedSamples <- data.frame(Simka_samples, Simka_groups, row.names = 1)

ANOSIM Test:

Simka_anosim <- anosim(simka_mtx_reordered, Simka_GroupedSamples$Simka_groups, permutations = 999, distance = "bray", NULL)
par(cex=1, mar=c(5, 5, 5, 5))
plot(Simka_anosim, main="SIMKA ANOSIM")
## Warning in bxp(structure(list(stats = structure(c(288, 580, 797, 1011, 1225, :
## some notches went outside hinges ('box'): maybe set notch=FALSE

summary(Simka_anosim) #ANOSIM significant
## 
## Call:
## anosim(x = simka_mtx_reordered, grouping = Simka_GroupedSamples$Simka_groups,      permutations = 999, distance = "bray", strata = NULL) 
## Dissimilarity: user supplied square matrix 
## 
## ANOSIM statistic R: 0.9669 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.116 0.151 0.184 0.225 
## 
## Dissimilarity ranks between and within classes:
##          0%    25%   50%     75% 100%   N
## Between 288 580.00 797.0 1011.00 1225 857
## A       182 338.50 385.0  430.00  541  55
## B       416 416.00 416.0  416.00  416   1
## C        NA     NA    NA      NA   NA   0
## D        NA     NA    NA      NA   NA   0
## E         1  75.75 150.5  226.25  344 300
## F       280 331.50 338.0  350.50  391   6
## G       507 532.00 557.0  557.50  558   3
## H       686 690.50 695.0  703.00  711   3

ANOSIM significant

Gene tables analysis

Tables:

Reading the gene length normalized table:

gene_lnorm_cts <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_gene_lengthNorm_counts.tbl", header = TRUE, sep = "\t")

gene_lnorm_cts_t <- t(gene_lnorm_cts)

Creating the Bray-Curtis dissimilarity matrix

gene_lnorm_cts_bray <- vegdist(gene_lnorm_cts_t, method = "bray")

UPGMA dendogram

gene_lnorm_cts_hclust <- hclust(gene_lnorm_cts_bray, method = "average")
plot(gene_lnorm_cts_hclust, main = "GeneLengthNorm")

Heatmap

heatmap(as.matrix(gene_lnorm_cts_bray), Rowv = as.dendrogram(gene_lnorm_cts_hclust), Colv = "Rowv", symm = TRUE, 
        main = "GeneLengthNorm")

Reading the gene length + SCG (Single Copy Genes) normalized table

gene_scg <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_gene_lengthNorm_SingleCopyGeneNorm_counts.tbl", header = TRUE, sep = "\t")

gene_scg_t <- t(gene_scg)

Creating the Bray-Curtis dissimilarity matrix

gene_scg_bray <- vegdist(gene_scg_t, method = "bray")

UPGMA dendogram

gene_scg_hclust <- hclust(gene_scg_bray, method = "average")
plot(gene_scg_hclust, main = "SCG")

Heatmap

heatmap(as.matrix(gene_scg_bray), Rowv = as.dendrogram(gene_scg_hclust), Colv = "Rowv", symm = TRUE, 
        main = "SCG")

Reading the gene length + metagenome size normalized table

gene_metaGsize <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_gene.lengthNorm.metaGsizeGbNorm.counts.tbl", header = TRUE, sep = "\t")

gene_metaGsize_t <- t(gene_metaGsize)

Creating the Bray-Curtis dissimilarity matrix

gene_metaGsize_bray <- vegdist(gene_metaGsize_t, method = "bray")

UPGMA dendogram

gene_metaGsize_hclust <- hclust(gene_metaGsize_bray, method = "average")
plot(gene_metaGsize_hclust, main = "metaGsize")

Heatmap

heatmap(as.matrix(gene_metaGsize_bray), Rowv = as.dendrogram(gene_metaGsize_hclust), Colv = "Rowv", symm = TRUE, 
        main = "metaGsize")

Reading the gene TPM normalized table

gene_tpm <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_gene_TPM.tbl", header = TRUE, sep = "\t")

gene_tpm_t <- t(gene_tpm)

Creating the Bray-Curtis dissimilarity matrix

gene_tpm_bray <- vegdist(gene_tpm_t, method = "bray")

UPGMA dendogram

gene_tpm_hclust <- hclust(gene_tpm_bray, method = "average")
plot(gene_tpm_hclust, main = "TPM")

Heatmap

heatmap(as.matrix(gene_tpm_bray), Rowv = as.dendrogram(gene_tpm_hclust), Colv = "Rowv", symm = TRUE, 
        main = "TPM")

Mantel tests for the gene dissimilarity matrices

plot(gene_lnorm_cts_bray, gene_metaGsize_bray, main = "LNorm vs metaGsize")
abline(0,1)

mantel(gene_lnorm_cts_bray, gene_metaGsize_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_lnorm_cts_bray, ydis = gene_metaGsize_bray) 
## 
## Mantel statistic r: 0.9872 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0554 0.0760 0.0958 0.1299 
## Permutation: free
## Number of permutations: 999
plot(gene_lnorm_cts_bray, gene_scg_bray, main = "LNorm vs SCG")
abline(0,1)

mantel(gene_lnorm_cts_bray, gene_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_lnorm_cts_bray, ydis = gene_scg_bray) 
## 
## Mantel statistic r: 0.9821 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0604 0.0831 0.1073 0.1340 
## Permutation: free
## Number of permutations: 999
plot(gene_lnorm_cts_bray, gene_tpm_bray, main = "LNorm vs TPM")
abline(0,1)

mantel(gene_lnorm_cts_bray, gene_tpm_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_lnorm_cts_bray, ydis = gene_tpm_bray) 
## 
## Mantel statistic r: 0.9872 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0625 0.0824 0.1030 0.1306 
## Permutation: free
## Number of permutations: 999
plot(gene_metaGsize_bray, gene_scg_bray, main = "metaGsize vs SCG")
abline(0,1)

mantel(gene_metaGsize_bray, gene_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_metaGsize_bray, ydis = gene_scg_bray) 
## 
## Mantel statistic r: 0.9941 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0552 0.0844 0.1089 0.1392 
## Permutation: free
## Number of permutations: 999
plot(gene_metaGsize_bray, gene_tpm_bray, main = "metaGsize vs TPM")
abline(0,1)

mantel(gene_metaGsize_bray, gene_tpm_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_metaGsize_bray, ydis = gene_tpm_bray) 
## 
## Mantel statistic r:     1 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0552 0.0806 0.1015 0.1324 
## Permutation: free
## Number of permutations: 999
plot(gene_scg_bray, gene_tpm_bray, main = "SCG vs TPM")
abline(0,1)

mantel(gene_scg_bray, gene_tpm_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_scg_bray, ydis = gene_tpm_bray) 
## 
## Mantel statistic r: 0.9939 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0646 0.0882 0.1100 0.1458 
## Permutation: free
## Number of permutations: 999

Observations: Good correlation between gene distance matrices.

NMDS

gene_lnorm_nmds <- monoMDS(gene_lnorm_cts_bray)
gene_lnorm_nmds
## 
## Call:
## monoMDS(dist = gene_lnorm_cts_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = gene_lnorm_cts_t, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.06745559 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 57 iterations: Stress nearly unchanged (ratio > sratmax)
plot(gene_lnorm_nmds, main = "LNorm NMDS")

gene_metaGsize_nmds <- monoMDS(gene_metaGsize_bray)
gene_metaGsize_nmds
## 
## Call:
## monoMDS(dist = gene_metaGsize_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = gene_metaGsize_t, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.05147572 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 78 iterations: Stress nearly unchanged (ratio > sratmax)
plot(gene_metaGsize_nmds, main = "metaGsize NMDS")

gene_scg_nmds <- monoMDS(gene_scg_bray)
gene_scg_nmds
## 
## Call:
## monoMDS(dist = gene_scg_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = gene_scg_t, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.1024221 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 48 iterations: Stress nearly unchanged (ratio > sratmax)
plot(gene_scg_nmds, main = "SCG NMDS")

gene_tpm_nmds <- monoMDS(gene_tpm_bray)
gene_tpm_nmds
## 
## Call:
## monoMDS(dist = gene_tpm_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = gene_tpm_t, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.05219151 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 63 iterations: Stress nearly unchanged (ratio > sratmax)
plot(gene_tpm_nmds, main = "TPM NMDS")

miTags table (rRNA extracted from the metagenomes)

Table basted on 16S and 18S gene marker.

Reading the miTags table and making the Bray-Curtis dissimilaity matrix

mitags <- read.table(file = "/home/srllana/R/Tables/EMOSE_mitags_table97.txt", header = TRUE, sep = "\t", row.names = 1)
mitags_t <- t(mitags)
mitags_bray <- vegdist(mitags_t, method = "bray")

UPGMA dendogram

mitags_hclust <- hclust(mitags_bray, method = "average")
plot(mitags_hclust, main = "miTags dendogram")

Heatmap

heatmap(as.matrix(mitags_bray), Rowv = as.dendrogram(mitags_hclust), Colv = "Rowv", symm = TRUE, 
        main = "miTags heatmap")

NMDS

mitags_nmds <- monoMDS(mitags_bray)
mitags_nmds
## 
## Call:
## monoMDS(dist = mitags_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = mitags_t, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.1104755 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 60 iterations: Stress nearly unchanged (ratio > sratmax)
plot(mitags_nmds, main = "miTags NMDS")

Mantel tests Gene and Function Matrices VS Simka and Taxonomy Matrices

#Gene VS Simka matrices:
plot(gene_lnorm_cts_bray, simka_dis.mtx, main = "LNorm vs SIMKA")
abline(0,1)

mantel(gene_lnorm_cts_bray, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_lnorm_cts_bray, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: 0.9106 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0801 0.1146 0.1432 0.1692 
## Permutation: free
## Number of permutations: 999
plot(gene_metaGsize_bray, simka_dis.mtx, main = "metaGsize vs SIMKA")
abline(0,1)

mantel(gene_metaGsize_bray, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_metaGsize_bray, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: 0.9111 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0822 0.1098 0.1334 0.1680 
## Permutation: free
## Number of permutations: 999
plot(gene_scg_bray, simka_dis.mtx, main = "SCG vs SIMKA")
abline(0,1)

mantel(gene_scg_bray, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_scg_bray, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: 0.9262 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0877 0.1146 0.1393 0.1856 
## Permutation: free
## Number of permutations: 999
plot(gene_tpm_bray, simka_dis.mtx, main = "TPM vs SIMKA")
abline(0,1)

mantel(gene_tpm_bray, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_tpm_bray, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: 0.9108 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0782 0.1072 0.1277 0.1516 
## Permutation: free
## Number of permutations: 999
#Gene VS Taxonomy matrices:
plot(gene_lnorm_cts_bray, mitags_bray, main = "LNorm vs miTags")
abline(0,1)

mantel(gene_lnorm_cts_bray, mitags_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_lnorm_cts_bray, ydis = mitags_bray) 
## 
## Mantel statistic r: 0.9726 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0666 0.0910 0.1217 0.1503 
## Permutation: free
## Number of permutations: 999
plot(gene_metaGsize_bray, mitags_bray, main = "metaGsize vs miTags")
abline(0,1)

mantel(gene_metaGsize_bray, mitags_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_metaGsize_bray, ydis = mitags_bray) 
## 
## Mantel statistic r: 0.9593 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0611 0.0842 0.1096 0.1551 
## Permutation: free
## Number of permutations: 999
plot(gene_scg_bray, mitags_bray, main = "SCG vs miTags")
abline(0,1)

mantel(gene_scg_bray, mitags_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_scg_bray, ydis = mitags_bray) 
## 
## Mantel statistic r: 0.9622 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0724 0.0974 0.1278 0.1560 
## Permutation: free
## Number of permutations: 999
plot(gene_tpm_bray, mitags_bray, main = "TPM vs miTags")
abline(0,1)

mantel(gene_tpm_bray, mitags_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = gene_tpm_bray, ydis = mitags_bray) 
## 
## Mantel statistic r: 0.959 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0623 0.0904 0.1153 0.1372 
## Permutation: free
## Number of permutations: 999
#Function VS Simka matrices:
plot(GO_bray_dist, simka_dis.mtx, main = "GO vs SIMKA")
abline(0,1)

mantel(GO_bray_dist, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GO_bray_dist, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: -0.007522 
##       Significance: 0.495 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0982 0.1269 0.1765 0.2052 
## Permutation: free
## Number of permutations: 999
plot(GOslim_bray_dist, simka_dis.mtx, main = "GO-slim vs SIMKA")
abline(0,1)

mantel(GOslim_bray_dist, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GOslim_bray_dist, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: -0.01509 
##       Significance: 0.569 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.100 0.131 0.157 0.200 
## Permutation: free
## Number of permutations: 999
plot(IPR_bray_dist, simka_dis.mtx, main = "IPR vs SIMKA")
abline(0,1)

mantel(IPR_bray_dist, simka_dis.mtx)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = IPR_bray_dist, ydis = simka_dis.mtx) 
## 
## Mantel statistic r: 0.003809 
##       Significance: 0.455 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.103 0.138 0.161 0.189 
## Permutation: free
## Number of permutations: 999
#Function VS Taxonomy matrices:
plot(GO_bray_dist, mitags_bray, main = "GO vs miTags")
abline(0,1)

mantel(GO_bray_dist, mitags_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GO_bray_dist, ydis = mitags_bray) 
## 
## Mantel statistic r: -0.004651 
##       Significance: 0.472 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0844 0.1159 0.1439 0.1830 
## Permutation: free
## Number of permutations: 999
plot(GOslim_bray_dist, mitags_bray, main = "GO-slim vs miTags")
abline(0,1)

mantel(GOslim_bray_dist, mitags_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = GOslim_bray_dist, ydis = mitags_bray) 
## 
## Mantel statistic r: -0.01179 
##       Significance: 0.541 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0995 0.1311 0.1617 0.2030 
## Permutation: free
## Number of permutations: 999
plot(IPR_bray_dist, mitags_bray, main = "IPR vs miTags")
abline(0,1)

mantel(IPR_bray_dist, mitags_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = IPR_bray_dist, ydis = mitags_bray) 
## 
## Mantel statistic r: 0.005682 
##       Significance: 0.447 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.100 0.138 0.170 0.199 
## Permutation: free
## Number of permutations: 999

Observations: - Good correlation between Gene & Simka and Gene & miTags distance matrices - Bad correlation between Function & Simka and Function & miTags distance matrices